* get employment data

use anon t ev vallazon1 fogvisz1 w1 wh1 wtip1 kor using "$in/admin3_alap.dta", clear

* exclude those who were never employed
egen emp=max(vallazon1), by(anon)
drop if emp==.
drop emp

xtset anon t

* get first ever affiliation with a certain firm, focusing on vallazon1, but also checking if it changes from/to vallazon2
egen mint1=min(t) if vallazon1!=., by(vallazon1 anon)

* flagging wage smoothing
label define lwtip 0 "monthly" 1 "smoothed" 2 "mixed"
label values wtip lwtip 
label var wtip "if wage smoothing"

append using "/homeProspSSD/Admin3/admin3_alap_kieg_long.dta", keep(t ev vallazon id_sor) 
egen mint2=min(t) if vallazon1!=., by(vallazon1 anon)
gen mint=min(mint1, mint2)
drop if id_sor!=.
drop id_sor

gen start=mint==t

drop mint1 mint2 mint

* add sick leave days
merge 1:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match)
assert passziv_tip1!=. if _merge==3
drop _merge
rename passziv_tip1 sick_leave_days

* add health shock information
preserve
use anon t max_bnoSext using "$out/placebo_base_bnoSext_2023Dec.dta", clear
keep if max_bnoSext==1
tempfile health_shocks
save `health_shocks'
restore
merge 1:1 anon t using `health_shocks'

* flag if individual ever gets a health shock
sort anon t
by anon : egen max_merge=max(_merge)
gen HS = 1 if max_merge==3
drop _merge

drop vallazon1 ev w1 wh1 kor fogvisz1 max_bnoSext 

sum
save "$out/HS0temp1_bnoSext_2022Sept.dta", replace

use "$out/HS0temp1_bnoSext_2022Sept.dta", clear

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 ev w1 wh1 kor ferfi fogvisz1)

* construct EMP (employment) and NONEMP (non-employment) variables *
gen     EMP = 1 if (vallazon1!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (sick_leave_days<6 | sick_leave_days==.) & (fogvisz1==201)
	
codebook anon	
	
drop if HS==1

codebook anon

* flag one observation randomly within each anon *
	set seed 54321
	generate random = uniform()
	by anon (random), sort: gen flag = _n == 1
* gen m for the flagged time period, within each anon
gen m = 0 if flag==1

gen tm = t if flag==1
preserve
	keep if flag==1
	keep anon tm
	rename tm mintm
	sort anon
	tempfile tm
	save `tm'
restore
sort anon
merge m:1 anon using `tm'
drop if _m==2
drop _merge

replace m = t-mintm
drop mintm

gen korm = kor if flag==1
preserve
	keep if flag==1
	keep anon korm
	rename korm minkorm
	sort anon
	tempfile korm
	save `korm'
restore
sort anon
merge m:1 anon using `korm'
drop if _m==2
drop _merge

keep if minkorm>=20 & minkorm<=50
drop minkorm

codebook anon

drop if (m<-24 | m>24) & m!=.

* keep if the year corresponding to m=0 is at least 2009 *
gen yearm = ev if m==0
preserve
	keep if m==0
	keep anon yearm
	rename yearm mintyearm
	sort anon
	tempfile yearm
	save `yearm'
restore
sort anon
merge m:1 anon using `yearm'
drop if _m==2
drop _merge
drop if mintyearm<2009
drop yearm mintyearm

codebook anon

sort anon t
		
* get the first month of EMP after the shock
egen firstEMPtemp = min(m) if m>0 & EMP==1, by(anon)	
egen firstEMP = max(firstEMPtemp), by(anon)
drop firstEMPtemp

egen EMPm0 = mean(EMP) if m==0, by(anon) 
* keep all anons (and all their observations) for whom either Wml1==1 | Wm0==1
foreach var of varlist EMPm0 {
	egen max`var' = max(`var'), by(anon)
	}

keep if (maxEMPm0 == 1)

gen ABS = 1 if firstEMP>=2 & firstEMP<=6
	gen ABSd=firstEMP if ABS==1
	
replace ABS = 0 if firstEMP==1 

* assign random d-s in such a way that the distribution of d-s among controls corresponds to that of the treated
	* 34.08% d=2
	* 30.33% d=3
	* 18.86% d=4
	* 10.31% d=5
	* 6.42% d=6
	
	tab ABS if m==0
	tab ABS if m==0 & (f2.EMP==1 | f3.EMP==1 | f4.EMP==1 | f5.EMP==1 | f6.EMP==1)
	tab ABSd if m==0
		
	* create indicators for potential d-s for each ABS=0 anon 
	foreach x of numlist 2/6{
		gen potd`x'=m==0 & f`x'.EMP==1 & ABS==0
	}
	
	* choose d-s:
	
	set seed 12345
	drop random
	gen random = runiform(0,1) if m==0 & ABS==0 & (potd2==1 | potd3==1 | potd4==1 | potd5==1 | potd6==1)
	sum anon if random!=.
	sort random
	
	egen c6=seq() if potd6==1 & ABS==0 & m==0
	gen dtemp=6 if c6>=1 & c6<=31709
	egen c5=seq() if potd5==1 & ABS==0 & m==0 & dtemp==.
	replace dtemp=5 if dtemp==. & c5>=1 & c5<=51184
	egen c4=seq() if potd4==1 & ABS==0 & m==0 & dtemp==.
	replace dtemp=4 if dtemp==. & c4>=1 & c4<=94062
	egen c3=seq() if potd3==1 & ABS==0 & m==0 & dtemp==.
	replace dtemp=3 if dtemp==. & c3>=1 & c3<=151351
	replace dtemp=2 if dtemp==. & potd2==1 & ABS==0 & m==0
	
	tab dtemp
	sum anon if m==0 & ABS==0 & (potd2==1 | potd3==1 | potd4==1 | potd5==1 | potd6==1) & dtemp==.
	replace ABSd=dtemp if ABSd==.
	tab ABSd if m==0
	
keep if ABSd!=.

keep if m==0
keep anon t ev ABS ABSd

sum
save "$out/potential_controls_updated_tlm_bnoSext2_SL5_d2_6_distrib_in_EMP_2025March19.dta"
